/**
 * 
 */
package com.reptile.core.webmagic.action;

import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.processor.PageProcessor;

/**
 * @author wangzihang
 *
 */
public class WebmagicAction implements PageProcessor {

    // 抓取网站的相关配置，可以包括编码、抓取间隔1s、重试次数等
	private Site site = Site.me().setCharset("utf8").setRetryTimes(3).setSleepTime(1000);

	/*
	 * (non-Javadoc)
	 * 
	 * @see us.codecraft.webmagic.processor.PageProcessor#getSite()
	 */
	@Override
	public Site getSite() {
		// TODO Auto-generated method stub
		return site;
	}

	/*
	 * (non-Javadoc)
	 * 
	 * @see us.codecraft.webmagic.processor.PageProcessor#process(us.codecraft.
	 * webmagic.Page)
	 */
	@Override
	public void process(Page page) {
		String dataId = page.getUrl().regex("http://open.taobao.com/apitools/apiPropTools.htm?spm=0.0.0.0.mlPbbQ").get();
		System.out.println("dataId: "+ dataId);

	}

	public static void main(String[] args) {
		WebmagicAction my = new WebmagicAction();
        long startTime, endTime;
        System.out.println("开始爬取...");
        
            startTime = System.currentTimeMillis();
            Spider.create(my).addUrl("http://open.taobao.com/apitools/apiPropTools.htm?spm=0.0.0.0.mlPbbQ").thread(5).run();
            endTime = System.currentTimeMillis();
            System.out.println("爬取结束，耗时约" + ((endTime - startTime) / 1000) + "秒");

	}

}
